Xingming Qu, Bowei Tian
In this lab, we will use the implementation from https://github.com/ykamikawa/tf-keras-SegNet/blob/master/layers.py
from keras import backend as K
from keras.layers import Layer
from keras.models import Model, Sequential, load_model
from keras.models import Model
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, Input
from keras.utils.data_utils import get_file
import keras.backend as K
import h5py
import numpy as np
import tensorflow as tf
from keras.models import Model, Sequential, load_model
import keras.backend as K
from vgg import VGG19, preprocess_input
from decoder import decoder_layers
import sys
import os
from vgg import VGG19, preprocess_input
from decoder import decoder_layers
import sys
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import Callback
from scipy.misc import imresize, imsave
import numpy as np
from model import EncoderDecoder
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.callbacks import Callback
from scipy.misc import imresize, imsave
## create customization layer
class MaxPoolingWithArgmax2D(Layer):
## follow the Keras sourse code
# you need to have the following function:
## initialization
def __init__(
self,
pool_size=(2, 2),
strides=(2, 2),
padding='same',
**kwargs):
super(MaxPoolingWithArgmax2D, self).__init__(**kwargs)
self.padding = padding
self.pool_size = pool_size
self.strides = strides
#this is where the layer's logic lives
def call(self, inputs, **kwargs):
padding = self.padding
pool_size = self.pool_size
strides = self.strides
if K.backend() == 'tensorflow':
# get the shape of the tensor
ksize = [1, pool_size[0], pool_size[1], 1]
padding = padding.upper()
strides = [1, strides[0], strides[1], 1]
#this function do the maxpooling and save the pooling mask
output, argmax = K.tf.nn.max_pool_with_argmax(
inputs,
ksize=ksize,
strides=strides,
padding=padding)
else:
errmsg = '{} backend is not supported for layer {}'.format(
K.backend(), type(self).__name__)
raise NotImplementedError(errmsg)
argmax = K.cast(argmax, K.floatx())
return [output, argmax]
#in case your layer modifies the shape of its input, you should specify here the shape transformation logic.
#This allows Keras to do automatic shape inference.
def compute_output_shape(self, input_shape):
ratio = (1, 2, 2, 1)
output_shape = [
dim//ratio[idx]
if dim is not None else None
for idx, dim in enumerate(input_shape)]
output_shape = tuple(output_shape)
return [output_shape, output_shape]
def compute_mask(self, inputs, mask=None):
return 2 * [None]
class MaxUnpooling2D(Layer):
def __init__(self, size=(2, 2), **kwargs):
super(MaxUnpooling2D, self).__init__(**kwargs)
self.size = size
def call(self, inputs, output_shape=None):
#get input and pooling mask
updates, mask = inputs[0], inputs[1]
with K.tf.variable_scope(self.name):
# convert mask to int32
mask = K.cast(mask, 'int32')
# get the shape of the tensor (4,)
input_shape = K.tf.shape(updates, out_type='int32')
# calculation new shape
# the new shape should only change in input_shape[1] and [2]
if output_shape is None:
output_shape = (
input_shape[0],
input_shape[1]*self.size[0],
input_shape[2]*self.size[1],
input_shape[3])
self.output_shape1 = output_shape
# calculation indices for batch, height, width and feature maps
#Instantiates an all-ones variable of the same shape as another tensor.
one_like_mask = K.ones_like(mask, dtype='int32')
batch_shape = K.concatenate(
[[input_shape[0]], [1], [1], [1]],
axis=0)
batch_range = K.reshape(
K.tf.range(output_shape[0], dtype='int32'),
shape=batch_shape)
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = (mask // output_shape[3]) % output_shape[2]
feature_range = K.tf.range(output_shape[3], dtype='int32')
f = one_like_mask * feature_range
# transpose indices & reshape update values to one dimension
updates_size = K.tf.size(updates)
indices = K.transpose(K.reshape(
K.stack([b, y, x, f]),
[4, updates_size]))
values = K.reshape(updates, [updates_size])
ret = K.tf.scatter_nd(indices, values, output_shape)
return ret
def compute_output_shape(self, input_shape):
mask_shape = input_shape[1]
return (
mask_shape[0],
mask_shape[1]*self.size[0],
mask_shape[2]*self.size[1],
mask_shape[3]
)
We will use the dataset from https://github.com/yuweiming70/Landscape-Dataset, which includes 7268 320x180 landscape photos
We will train two models with upPooling
# we create 2 tow model
# the first one was 2layer-2layer encoder-decoder
# the second one was 4layer-4layer encoder-decoder
def first_model():
inputs = Input(shape=(256, 256, 3))
pool_size=(2,2)
#####encoder
# Block 1
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x, mask_1 = MaxPoolingWithArgmax2D(pool_size)(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x, mask_2 = MaxPoolingWithArgmax2D(pool_size)(x)
#####decoder
x = MaxUnpooling2D(pool_size)([x, mask_2])
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv2')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)
x = MaxUnpooling2D(pool_size)([x, mask_1])
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv2')(x)
output = Conv2D(3, (3, 3), activation='relu', padding='same',name='decoder_out')(x)
return Model(inputs, output, name='decodermy')
def second_model():
inputs = Input(shape=(256, 256, 3))
pool_size=(2,2)
#####encoder
# Block 1
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x, mask_1 = MaxPoolingWithArgmax2D(pool_size)(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x, mask_2 = MaxPoolingWithArgmax2D(pool_size)(x)
# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x, mask_3 = MaxPoolingWithArgmax2D(pool_size)(x)
# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x, mask_4 = MaxPoolingWithArgmax2D(pool_size)(x)
#####decoder
x = MaxUnpooling2D(pool_size)([x, mask_4])
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv3')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='decoder_block4_conv2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block4_conv1')(x)
x = MaxUnpooling2D(pool_size)([x, mask_3])
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv3')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='decoder_block3_conv2')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block3_conv1')(x)
x = MaxUnpooling2D(pool_size)([x, mask_2])
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='decoder_block2_conv2')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block2_conv1')(x)
x = MaxUnpooling2D(pool_size)([x, mask_1])
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='decoder_block1_conv2')(x)
output = Conv2D(3, (3, 3), activation='relu', padding='same',name='decoder_out')(x)
return Model(inputs, output, name='decodermy')
TRAIN_PATH = 'data'
TARGET_SIZE = (256, 256)
BATCH_SIZE = 16
# Create ImageDataGenerator
datagen = ImageDataGenerator()
gen = datagen.flow_from_directory(TRAIN_PATH, target_size=TARGET_SIZE,
batch_size=BATCH_SIZE, class_mode=None)
def create_gen(img_dir, target_size, batch_size):
datagen = ImageDataGenerator()
gen = datagen.flow_from_directory(img_dir, target_size=target_size,
batch_size=batch_size, class_mode=None)
def tuple_gen():
for img in gen:
if img.shape[0] != batch_size:
continue
# (X, y)
yield (img, img)
return tuple_gen()
# This needs to be in scope where model is defined
class OutputPreview(Callback):
def __init__(self, model, test_img_path, increment, preview_dir_path):
test_img = image.load_img(test_img_path)
test_img = imresize(test_img, (256, 256, 3))
test_target = image.img_to_array(test_img)
test_target = np.expand_dims(test_target, axis=0)
self.test_img = test_target
self.model = model
self.preview_dir_path = preview_dir_path
self.increment = increment
self.iteration = 0
def on_batch_end(self, batch, logs={}):
if (self.iteration % self.increment == 0):
output_img = self.model.predict(self.test_img)[0]
fname = '%d.jpg' % self.iteration
out_path = os.path.join(self.preview_dir_path, fname)
imsave(out_path, output_img)
self.iteration += 1
gen = create_gen(TRAIN_PATH, TARGET_SIZE, BATCH_SIZE)
num_samples = 7268
steps_per_epoch = num_samples // BATCH_SIZE
# encoder_decoder =first_model()
# encoder_decoder.compile(optimizer='adam', loss='mse')
# # encoder_decoder.summary()
# target_layer=2
# callbacks = [OutputPreview(encoder_decoder, 'data/land/0.jpg', 300, 'upout/%d' % target_layer)]
# epochs = 10
# encoder_decoder.fit_generator(gen, steps_per_epoch=steps_per_epoch,
# epochs=epochs, callbacks=callbacks)
# encoder_decoder.save('first_model.h5')
# encoder_decoder =second_model()
# encoder_decoder.compile(optimizer='adam', loss='mse')
# # encoder_decoder.summary()
# target_layer=4
# callbacks = [OutputPreview(encoder_decoder, 'data/land/3.jpg', 300, 'upout/%d' % target_layer)]
# epochs = 10
# encoder_decoder.fit_generator(gen, steps_per_epoch=steps_per_epoch,
# epochs=epochs, callbacks=callbacks)
# encoder_decoder.save('second_model.h5')
I also tried to train two encoder-decoders using upsampling. But I find the sample VGG code seems not correct. The encoder and decoder were not symmetrical. I modified the vgg.py and decoder.py. I will upload them in the canvas. Again, I trained the 2layer-2layer encoder-decoder and 4layer-4layer encoder-decoder. The difference between these two and the previous two are the unpooling/upsampling layer. You can find the training detail at Attachment1.html
So there are 4 models in total:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import warnings
warnings.filterwarnings("ignore")
# input a image then pass it to the model to get reconstruction
def get_reconstruction(model,img_path):
newimg=mpimg.imread( img_path)
#first we need to do the preprocessing
# resize to vgg input size
newimg=cv2.resize(newimg,(256,256))
# add the batch dimension
newimg = np.expand_dims(newimg, axis=0)
# get the reconstruction
reconstruction_m1=model.predict(newimg)
# remove the batch dimension
reconstruction_m1=np.squeeze(reconstruction_m1)
# clips pixel value to 0-255
reconstruction_m1=reconstruction_m1.astype(int)
reconstruction_m1=np.clip(reconstruction_m1, 0, 255)
return reconstruction_m1
from keras.models import load_model
import cv2
# load the 4 models
# m1= unPooling 2 layer
m1=first_model()
m1.load_weights('first_model.h5')
# m2= unPooling 4 layer
m2=second_model()
m2.load_weights('second_model.h5')
# encoder_decoder_2 = Upsampling 2 payer
encoder_decoder_2 = EncoderDecoder(target_layer=2)
encoder_decoder_2.encoder.load_weights('encoder_2.h5')
encoder_decoder_2.decoder.load_weights('decoder_2.h5')
# encoder_decoder_4 = Upsampling 4 payer
encoder_decoder_4 = EncoderDecoder(target_layer=4)
encoder_decoder_4.encoder.load_weights('encoder_4.h5')
encoder_decoder_4.decoder.load_weights('decoder_4.h5')
def show_result(image):
origin=mpimg.imread( image)
plt.figure(figsize=(8,6))
plt.imshow( origin)
plt.title('Original img')
plt.figure(figsize=(11,11))
plt.subplot(1,2,1)
reconstruction=get_reconstruction(m1,image)
plt.imshow( reconstruction )
plt.title('using unPooling 2 layer encoder_decoder')
plt.subplot(1,2,2)
reconstruction=get_reconstruction(m2,image)
plt.imshow( reconstruction )
plt.title('using unPooling 4 layer encoder_decoder')
plt.figure(figsize=(11,11))
plt.subplot(1,2,1)
reconstruction=get_reconstruction(encoder_decoder_2.model,image)
plt.imshow( reconstruction )
plt.title('using Upsampling 2 layer encoder_decoder')
plt.subplot(1,2,2)
reconstruction=get_reconstruction(encoder_decoder_4.model,image)
plt.imshow( reconstruction )
plt.title('using Upsampling 4 layer encoder_decoder')
im='data/land/3.jpg'
show_result(im)
im='data/land/2.jpg'
show_result(im)
im='data/land/50.jpg'
show_result(im)
I think unPooling 2 and Upsampling 2 both good!
The reconstruction of unPooling 2 layer encoder_decoder has some graininess(it might because it uses 0 padding when unpooling), which means it is not as smooth as Upsampling 2 layer encoder_decoder. So I think Upsampling 2 layer encoder_decoder is better
Unpooling 2 layer encoder_decoder is a much better. It seems like unPooling 4 layer is still underfitting. Because 4 layer has more parameters, it needs longer epoch to converge.
It is obvious that both Upsampling 4 and unPooling 4 did not converge. Compared to Upsampling 4, unPooling 4 restores more details while Upsampling 4 is more smooth.
def wct(content, style, alpha=0.9, eps=1e-3):
'''
https://github.com/eridgd/WCT-TF/blob/master/ops.py
Perform Whiten-Color Transform on feature maps using numpy
See p.4 of the Universal Style Transfer paper for equations:
https://arxiv.org/pdf/1705.08086.pdf
'''
# 1xHxWxC -> CxHxW
content_t = np.transpose(np.squeeze(content), (2, 0, 1))
style_t = np.transpose(np.squeeze(style), (2, 0, 1))
# CxHxW -> CxH*W
content_flat = content_t.reshape(-1, content_t.shape[1]*content_t.shape[2])
style_flat = style_t.reshape(-1, style_t.shape[1]*style_t.shape[2])
# Whitening transform
mc = content_flat.mean(axis=1, keepdims=True)
fc = content_flat - mc
fcfc = np.dot(fc, fc.T) / (content_t.shape[1]*content_t.shape[2] - 1)
Ec, wc, _ = np.linalg.svd(fcfc)
k_c = (wc > 1e-5).sum()
Dc = np.diag((wc[:k_c]+eps)**-0.5)
fc_hat = Ec[:,:k_c].dot(Dc).dot(Ec[:,:k_c].T).dot(fc)
# Coloring transform
ms = style_flat.mean(axis=1, keepdims=True)
fs = style_flat - ms
fsfs = np.dot(fs, fs.T) / (style_t.shape[1]*style_t.shape[2] - 1)
Es, ws, _ = np.linalg.svd(fsfs)
k_s = (ws > 1e-5).sum()
Ds = np.sqrt(np.diag(ws[:k_s]+eps))
fcs_hat = Es[:,:k_s].dot(Ds).dot(Es[:,:k_s].T).dot(fc_hat)
fcs_hat = fcs_hat + ms
# Blend transform features with original features
blended = alpha*fcs_hat + (1 - alpha)*(fc)
# CxH*W -> CxHxW
blended = blended.reshape(content_t.shape)
# CxHxW -> 1xHxWxC
blended = np.expand_dims(np.transpose(blended, (1,2,0)), 0)
return np.float32(blended)
def preprocessing_img(img_path):
# preprocessing img to fit vgg input
newimg=mpimg.imread( img_path)
newimg=cv2.resize(newimg,(256,256))
newimg = np.expand_dims(newimg, axis=0)
return newimg
def tansfer(img_c,img_s,alpha):
#using encoder to get the feature of I_C and I_S
feats_c=encoder_decoder_2.encoder.predict(img_c)
feats_s=encoder_decoder_2.encoder.predict(img_s)
# do WCT
feats_cs = wct(feats_c, feats_s,alpha)
# using decoder to reconstruct
out = encoder_decoder_2.decoder.predict(feats_cs)
# make the out put a image
out=np.squeeze(out)
out=out.astype(int)
out=np.clip(out, 0, 255)
return out
def show_result(img_c_path,img_s_path,alpha,outname):
img_c=preprocessing_img(img_c_path)
img_s=preprocessing_img(img_s_path)
out=tansfer(img_c,img_s,alpha)
imsave('out.jpg', out)
plt.figure(figsize=(20,20))
plt.subplot(1,3,1)
I_c=mpimg.imread(img_c_path)
plt.imshow( I_c )
plt.title('Content IMG')
plt.subplot(1,3,2)
I_s=mpimg.imread(img_s_path)
plt.imshow( I_s )
plt.title('Style IMG')
newout=mpimg.imread('out.jpg')
newout=cv2.resize(newout,(I_s.shape[1],I_s.shape[0]))
imsave(outname+'.jpg', newout)
plt.subplot(1,3,3)
plt.imshow(newout)
plt.title('Transfer IMG')
print('Summer Lake --> Winter Lake')
show_result('data/land/50.jpg','data/land/3.jpg',0.9,'Tansfer50')
print('Desert Pyramid --> Oasis Pyramid')
show_result('data/land/1765.jpg','data/land/1581.jpg',0.9,'Tansfer1765')
print('Daytime Building --> Night Building')
show_result('data/land/2212.jpg','data/land/2226.jpg',0.5,'Tansfer2212')
print('Color the background')
show_result('data/land/4944.jpg','data/land/5228.jpg',0.9,'Tansfer4944')
The results of whitening and coloring transform looks good! However, these four pictures have more or less artifacts at the edges. Especially the edges of the building in the third picture and shadow on both sides of the bridge in the first picture. Besides, some details in the picture are missing. For example, the outline of building's window is not obvious. In other word, it becomes blurred.
In this part, we will use the exsiting code for smoothing constraint from https://github.com/NVIDIA/FastPhotoStyle
Specifically we will use https://github.com/NVIDIA/FastPhotoStyle/blob/master/photo_smooth.py
Tried to understand their code but its very hard....if you look at the above link....They only wrote one line of note!
import scipy.misc
import numpy as np
import scipy.sparse
import scipy.sparse.linalg
from numpy.lib.stride_tricks import as_strided
from PIL import Image
# the author create a classs called Propagator to do the smoothing
# and the the computational bottleneck of Photorealistic Image Stylization is the propagation step
class Propagator():
def __init__(self, beta=0.9999):
super(Propagator, self).__init__()
self.beta = beta
def process(self, initImg, contentImg):
# read content image
if type(contentImg) == str:
content = scipy.misc.imread(contentImg, mode='RGB')
else:
content = contentImg.copy()
# content = scipy.misc.imread(contentImg, mode='RGB')
# read un-smoothing image as B and # scale all the pixel to 0-1
if type(initImg) == str:
B = scipy.misc.imread(initImg, mode='RGB').astype(np.float64) / 255
else:
B = scipy.asarray(initImg).astype(np.float64) / 255
# B = scipy.misc.imread(initImg, mode='RGB').astype(np.float64)/255
#get the shape of un-smoothing image
h1,w1,k = B.shape
# do not use edge pixels
h = h1 - 4
w = w1 - 4
B = B[int((h1-h)/2):int((h1-h)/2+h),int((w1-w)/2):int((w1-w)/2+w),:]
# finish cutting the edge
# make the content image have the same shape as un-smoothing image
content = scipy.misc.imresize(content,(h,w))
# padding the un-smoothing image to its orginal shape
B = self.__replication_padding(B,2)
# padding content image to its orginal shape
content = self.__replication_padding(content,2)
# scale all the pixel to 0-1
content = content.astype(np.float64)/255
# reshape to its orginal shape. Actually no need to do this.
B = np.reshape(B,(h1*w1,k))
# compute The affinity matrix W of content image
# they were using the matting Laplacian from
# http://webee.technion.ac.il/people/anat.levin/papers/Matting-Levin-Lischinski-Weiss-CVPR06.pdf
# formula (15)
W = self.__compute_laplacian(content)
# Convert this matrix to Compressed Sparse Column format
# Duplicate entries will be summed together.
W = W.tocsc()
# compute sum at dimension [0]
dd = W.sum(0)
dd = np.sqrt(np.power(dd,-1))
dd = dd.A.squeeze()
#m = np.matrix([[1, 2], [3, 4]])
#m.squeeze()
# matrix([[1, 2],
# [3, 4]])
#Compressed Sparse Column matrix
D = scipy.sparse.csc_matrix((dd, (np.arange(0,w1*h1), np.arange(0,w1*h1)))) # 0.026
# this line is not same as mentioned in the paper
# paper used D ** -(1/2)
# There should be no big difference in results
S = D.dot(W).dot(D)
# Identity matrix in sparse format
# Returns an identity matrix with shape (n,n) using a given sparse format and dtype.
A = scipy.sparse.identity(w1*h1) - self.beta*S
A = A.tocsc()
#Return a function for solving a sparse linear system, with A pre-factorized.
#To solve the linear system of equations given in A,
#the solve callable should be passed an ndarray of shape (N,).
solver = scipy.sparse.linalg.factorized(A)
# solver is result
V = np.zeros((h1*w1,k))
V[:,0] = solver(B[:,0])
V[:,1] = solver(B[:,1])
V[:,2] = solver(B[:,2])
V = V*(1-self.beta)
V = V.reshape(h1,w1,k)
V = V[2:2+h,2:2+w,:]
img = Image.fromarray(np.uint8(np.clip(V * 255., 0, 255.)))
return img
# Returns sparse matting laplacian
def __compute_laplacian(self, img, eps=10**(-7), win_rad=1):
# win_size ==9
win_size = (win_rad*2+1)**2
# get content image shape
h, w, d = img.shape
# c_h= h-2 c_w=w-2
# still cut the edge
c_h, c_w = h - 2*win_rad, w - 2*win_rad
win_diam = win_rad*2+1 # win_diam= 3
# for example indsM = np.arange(2*3).reshape((2, 3)) produce
# array([[0, 1, 2],
# [3, 4, 5]]) so this code get index for each pixel
indsM = np.arange(h*w).reshape((h, w))
# flatten each channel
ravelImg = img.reshape(h*w, d)
#create sliding windows
win_inds = self.__rolling_block(indsM, block=(win_diam, win_diam))
# flatten each sliding window
win_inds = win_inds.reshape(c_h, c_w, win_size)
winI = ravelImg[win_inds]
#mu and var are the mean and variance of the intensities in the window wk around k
win_mu = np.mean(winI, axis=2, keepdims=True)
win_var = np.einsum('...ji,...jk ->...ik', winI, winI)/win_size - np.einsum('...ji,...jk ->...ik', win_mu, win_mu)
inv = np.linalg.inv(win_var + (eps/win_size)*np.eye(3))
#Evaluates the Einstein summation convention on the operands.
#Using the Einstein summation convention, many common multi-dimensional,
#linear algebraic array operations can be represented in a simple fashion.
#In implicit mode einsum computes these values.
X = np.einsum('...ij,...jk->...ik', winI - win_mu, inv)
vals = (1/win_size)*(1 + np.einsum('...ij,...kj->...ik', X, winI - win_mu))
#Return a contiguous flattened array.
# A 1-D array, containing the elements of the input, is returned.
nz_indsCol = np.tile(win_inds, win_size).ravel()
nz_indsRow = np.repeat(win_inds, win_size).ravel()
nz_indsVal = vals.ravel()
L = scipy.sparse.coo_matrix((nz_indsVal, (nz_indsRow, nz_indsCol)), shape=(h*w, h*w))
return L
# this function is used to padding the image,since we need to calculate 8 neighbour for each pixel.
# so we can handle the edge pixel
def __replication_padding(self, arr,pad):
h,w,c = arr.shape
ans = np.zeros((h+pad*2,w+pad*2,c))
for i in range(c):
ans[:,:,i] = np.pad(arr[:,:,i],pad_width=(pad,pad),mode='edge')
return ans
# !! this function is used to create sliding windows!!
# detail please see below
def __rolling_block(self, A, block=(3, 3)):
shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block
strides = (A.strides[0], A.strides[1]) + A.strides
return as_strided(A, shape=shape, strides=strides)
The test codes next are used for understanding the Propagator codes.
I unseal the functions inside Propagator class and tried to run them and get their output
Feel free to skip it.....
indsM = np.arange(4*4).reshape((4, 4))
indsM
img=cv2.imread('data/land/50.jpg')
ravelImg = img.reshape(180*320, 3)
ravelImg.shape
x = np.arange(5*4).reshape((5, 4))
print(x)
print(x.strides)
# default is int64 so each element is 8byte. and there are 4 col so it is 4x8
more about strides and as_strided http://www.jessicayung.com/numpy-arrays-memory-and-strides/
http://arogozhnikov.github.io/2015/09/30/NumpyTipsAndTricks2.html
## !! this function is used to create sliding windows!!
def rolling_block(A, block=(3, 3)):
print("A.shape",A.shape)
shape = (A.shape[0] - block[0] + 1, A.shape[1] - block[1] + 1) + block
print('shape',shape)
print((A.strides[0], A.strides[1]))
print(A.strides)
strides = (A.strides[0], A.strides[1]) + A.strides
print(strides)
return as_strided(A, shape=shape, strides=strides)
RB=rolling_block(indsM)
print(indsM)
print(RB)
print(RB.shape)
eps=10**(-7)
win_rad=1
img=np.arange(5*4*3).reshape((5, 4,3))
print(img.shape)
win_size = (win_rad*2+1)**2
# get content image shape
h, w, d = img.shape
# c_h= h-2 c_w=w-2
# still cut the edge
c_h, c_w = h - 2*win_rad, w - 2*win_rad
win_diam = win_rad*2+1 # win_diam= 3
# for example indsM = np.arange(2*3).reshape((2, 3)) produce
# array([[0, 1, 2],
# [3, 4, 5]]) so this code get index for each pixel
indsM = np.arange(h*w).reshape((h, w))
# flatten each channel
ravelImg = img.reshape(h*w, d)
#create sliding windows
print('indsM',indsM)
win_inds = rolling_block(indsM, block=(win_diam, win_diam))
# flatten each sliding window
win_inds = win_inds.reshape(c_h, c_w, win_size)
print(win_inds)
winI = ravelImg[win_inds]
print(winI)
img.shape
img.sum(0).shape
a = np.arange(25).reshape(5,5)
a
print(np.einsum('ii->i', a))# Extract the diagonal (requires explicit form):
print(np.einsum('ij->i', a)) # Sum over an axis (requires explicit form):
print(np.einsum('...j->...', a))
def show_result_after_smooth(initImg, contentImg):
plt.figure(figsize=(20,20))
plt.subplot(1,2,1)
I_c=mpimg.imread(initImg)
plt.imshow( I_c )
plt.title('Tansfer IMG')
plt.subplot(1,2,2)
p_pro = Propagator()
smooth=p_pro.process(initImg,contentImg)
plt.imshow( smooth )
plt.title('Tansfer IMG after smooth')
show_result_after_smooth('Tansfer50.jpg','data/land/50.jpg')
show_result_after_smooth('Tansfer1765.jpg','data/land/1765.jpg')
show_result_after_smooth('Tansfer2212.jpg','data/land/2212.jpg')
show_result_after_smooth('Tansfer4944.jpg','data/land/4944.jpg')
The results are amazing! The edges look more natural! And it restored more details: see the text in the bus and the building behind the bus!